###########################################################################################################
# Replication for county-level analysis for Bavaria presented in Table A4 (Religious diversity in 1939 and 1950)
# Charnysh and Schaub, Migration and Social Change: Evidence from post-WWII Displacement in Germany, JOP
# R version 4.3.1 (2023-06-16) 
###########################################################################################################

rm(list = ls())
library(foreign)
library(openxlsx)
library(dplyr)
library(sandwich)
library(lmtest)
library(stargazer)


dat<-read.xlsx("county_data/dataset19391950.xlsx")

dat[which(dat$KREIS50=="5536000"), c("Pop1950", "Evang1950", "Kath1950", "Judische1950", "Heimatvertriebene1950", "VertrEvang1950", "VertrKath1950")]
#Correct Evang1950 for Landkreis Münster based on Gemeindestatistik Nordheim Westfalen, pages 1-23 
dat$Evang1950[which(dat$KREIS50=="5536000")]<-13184 

##Variables for 1950:
dat$srefugees<-dat$Heimatvertriebene1950/dat$Pop1950
dat$sprotexp<-dat$VertrEvang1950/dat$Heimatvertriebene1950 #Share Protestant expellees
dat$scathexp<-dat$VertrKath1950/dat$Heimatvertriebene1950 #Share Catholic expellees
dat$sotherexp<-(dat$Heimatvertriebene1950-dat$VertrEvang1950-dat$VertrKath1950)/dat$Heimatvertriebene1950
dat[which(dat$sotherexp<0), c("KREIS50", "Kreis","Heimatvertriebene1950", "VertrEvang1950", "VertrKath1950")] #some below zero, fix
dat$sotherexp<-ifelse(dat$sotherexp<0, 0, dat$sotherexp)  #Assumption that these are zeros.
dat$natives<-dat$Pop1950-dat$Heimatvertriebene1950 #
dat$OtherReligion1950<-dat$Pop1950-dat$Kath1950-dat$Evang1950



dat$scathnative<-(dat$Kath1950-dat$VertrKath1950)/dat$natives #Share Catholic natives
dat$sprotnative<-(dat$Evang1950-dat$VertrEvang1950)/dat$natives #Share Protestant natives
dat$sothernative<-(dat$OtherReligion1950-(dat$Heimatvertriebene1950-dat$VertrEvang1950-dat$VertrKath1950))/dat$natives #Share Other natives
dat[which(dat$sothernative<0),]  #all fine    
dat$nativefrac1950<-1-(dat$scathnative^2+dat$sprotnative^2+dat$sothernative^2)
summary(dat$nativefrac1950)
dat$reffrac1950<-1-(dat$scathexp^2+dat$sprotexp^2+dat$sotherexp^2)
summary(dat$reffrac1950)


dat$sprotestants1939<-dat$Evangelische1939/dat$Population1939
dat$scatholics1939<-dat$Katholische1939/dat$Population1939
dat$OtherReligion1939<-dat$Population1939-dat$Katholische1939-dat$Evangelische1939
dat$sother1939<-dat$OtherReligion1939/dat$Population1939
dat$sprotestants1950<-dat$Evang1950/dat$Pop1950
dat$scatholics1950<-dat$Kath1950/dat$Pop1950
dat$sother1950<-(dat$Pop1950-dat$Kath1950-dat$Evang1950)/dat$Pop1950

dat$relfrac1939<-1-(dat$sprotestants1939^2+dat$scatholics1939^2+dat$sother1939^2)
summary(dat$relfrac1939)
dat$relfrac1950<-1-(dat$sprotestants1950^2+dat$scatholics1950^2+dat$sother1950^2)


dat$DeltaProt<-dat$sprotestants1950-dat$sprotestants1939
dat$DeltaCath<-dat$scatholics1950-dat$scatholics1939
dat$DeltaDiv<-dat$relfrac1950-dat$relfrac1939




##At the county level: did Catholic and Protestant shares of natives stay the same between 1939 and 1950? (reported on p. 12 in the paper)
cor.test(dat$relfrac1939, dat$nativefrac1950) #correlation of 0.98 (p<0.0001).
cor.test(dat$relfrac1939[dat$Land=="Bavaria"], dat$nativefrac1950[dat$Land=="Bavaria"]) #correlation of 0.98 (p<0.0001) 


#calculate religious distance following Braun and Dwenger (2020): 
dat$ReligDistanceV1<-sqrt((dat$scathnative-dat$scathexp)^2+(dat$sprotnative-dat$sprotexp)^2+(dat$sothernative-dat$sotherexp)^2) #+(data_1987m2$ShareOtherNatives-data_1987m2$ShareOtherExpellees)^2
summary(dat$ReligDistanceV1) 
dat$ReligDistanceV1[which(dat$ReligDistanceV1>1)]<-1 #three are above one, cap them at one. 
summary(dat$ReligDistanceV1)



##############################################################################################
######## Table A4: Religoius diversity in 1939 and 1950 by native and expellee status  #######
##############################################################################################

m1<-lm(nativefrac1950~relfrac1939, data=dat) 
m1.c <- coeftest(m1, vcov=vcovHC(m1, type="HC1"))

m2<-lm(nativefrac1950~relfrac1939+reffrac1950, data=dat) 
m2.c <- coeftest(m2, vcov=vcovHC(m2, type="HC1"))

m3<-lm(nativefrac1950~relfrac1939+srefugees, data=dat) 
m3.c <- coeftest(m3, vcov=vcovHC(m3, type="HC1"))

m4<-lm(relfrac1950~srefugees+nativefrac1950, data=dat) 
m4.c <- coeftest(m4, vcov=vcovHC(m4, type="HC1"))

m5<-lm(relfrac1950~srefugees+ReligDistanceV1+nativefrac1950, data=dat) 
m5.c <- coeftest(m5, vcov=vcovHC(m5, type="HC1"))

m6<-lm(relfrac1950~srefugees+ReligDistanceV1+I(ReligDistanceV1*srefugees)+nativefrac1950, data=dat) 
m6.c <- coeftest(m6, vcov=vcovHC(m6, type="HC1"))

stargazer(m1, m2, m3, m4, m5, m6,
          se = list(m1.c[, 2], m2.c[, 2], m3.c[, 2], m4.c[, 2], m5.c[, 2], m6.c[, 2]),
          p = list(m1.c[, 4], m2.c[, 4], m3.c[, 4], m4.c[, 4], m5.c[, 4], m6.c[, 4]), 
          covariate.labels = c("Religious fractionalization (1939)", 
                               "Expellee fractionalization (1950)", 
                               "Share expellees (1950)",
                               "Religious distance (1950)", 
                               "I(Religious distance* Share expellees)", 
                               "Natives fractionalization (1950)"),
          omit.stat = c("LL", "ser", "f"),
          no.space=TRUE
)
